{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-07-20T06:54:12.134991Z",
     "start_time": "2025-07-20T06:54:12.131074Z"
    }
   },
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "import sys\n",
    "from pathlib import Path\n",
    "\n",
    "os.chdir(\"C:\\\\Users\\\\SuperAdmin\\\\PycharmProjects\\\\tut2.0\")\n",
    "# 导入路径配置\n",
    "from src.utils.paths import RAW_DATA_DIR, PROCESSED_DATA_DIR\n"
   ],
   "outputs": [],
   "execution_count": 27
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.693656Z",
     "start_time": "2025-07-20T06:44:45.679053Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.read_csv(RAW_DATA_DIR / \"train.csv\")\n",
    "df.info()"
   ],
   "id": "27fbbd8f4dfe2558",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1100 entries, 0 to 1099\n",
      "Data columns (total 31 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Attrition                 1100 non-null   int64 \n",
      " 1   Age                       1100 non-null   int64 \n",
      " 2   BusinessTravel            1100 non-null   object\n",
      " 3   Department                1100 non-null   object\n",
      " 4   DistanceFromHome          1100 non-null   int64 \n",
      " 5   Education                 1100 non-null   int64 \n",
      " 6   EducationField            1100 non-null   object\n",
      " 7   EmployeeNumber            1100 non-null   int64 \n",
      " 8   EnvironmentSatisfaction   1100 non-null   int64 \n",
      " 9   Gender                    1100 non-null   object\n",
      " 10  JobInvolvement            1100 non-null   int64 \n",
      " 11  JobLevel                  1100 non-null   int64 \n",
      " 12  JobRole                   1100 non-null   object\n",
      " 13  JobSatisfaction           1100 non-null   int64 \n",
      " 14  MaritalStatus             1100 non-null   object\n",
      " 15  MonthlyIncome             1100 non-null   int64 \n",
      " 16  NumCompaniesWorked        1100 non-null   int64 \n",
      " 17  Over18                    1100 non-null   object\n",
      " 18  OverTime                  1100 non-null   object\n",
      " 19  PercentSalaryHike         1100 non-null   int64 \n",
      " 20  PerformanceRating         1100 non-null   int64 \n",
      " 21  RelationshipSatisfaction  1100 non-null   int64 \n",
      " 22  StandardHours             1100 non-null   int64 \n",
      " 23  StockOptionLevel          1100 non-null   int64 \n",
      " 24  TotalWorkingYears         1100 non-null   int64 \n",
      " 25  TrainingTimesLastYear     1100 non-null   int64 \n",
      " 26  WorkLifeBalance           1100 non-null   int64 \n",
      " 27  YearsAtCompany            1100 non-null   int64 \n",
      " 28  YearsInCurrentRole        1100 non-null   int64 \n",
      " 29  YearsSinceLastPromotion   1100 non-null   int64 \n",
      " 30  YearsWithCurrManager      1100 non-null   int64 \n",
      "dtypes: int64(23), object(8)\n",
      "memory usage: 266.5+ KB\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.715310Z",
     "start_time": "2025-07-20T06:44:45.709258Z"
    }
   },
   "cell_type": "code",
   "source": [
    "unique_counts = df.nunique()\n",
    "\n",
    "print(unique_counts)"
   ],
   "id": "5492966c2e0a0333",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Attrition                      2\n",
      "Age                           43\n",
      "BusinessTravel                 3\n",
      "Department                     3\n",
      "DistanceFromHome              29\n",
      "Education                      5\n",
      "EducationField                 6\n",
      "EmployeeNumber              1100\n",
      "EnvironmentSatisfaction        4\n",
      "Gender                         2\n",
      "JobInvolvement                 4\n",
      "JobLevel                       5\n",
      "JobRole                        9\n",
      "JobSatisfaction                4\n",
      "MaritalStatus                  3\n",
      "MonthlyIncome               1028\n",
      "NumCompaniesWorked            10\n",
      "Over18                         1\n",
      "OverTime                       2\n",
      "PercentSalaryHike             15\n",
      "PerformanceRating              2\n",
      "RelationshipSatisfaction       4\n",
      "StandardHours                  1\n",
      "StockOptionLevel               4\n",
      "TotalWorkingYears             40\n",
      "TrainingTimesLastYear          7\n",
      "WorkLifeBalance                4\n",
      "YearsAtCompany                35\n",
      "YearsInCurrentRole            19\n",
      "YearsSinceLastPromotion       16\n",
      "YearsWithCurrManager          18\n",
      "dtype: int64\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.744107Z",
     "start_time": "2025-07-20T06:44:45.740197Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 删除无信息列\n",
    "df = df.drop(columns=['Over18', 'StandardHours', 'EmployeeNumber'])"
   ],
   "id": "a15da195caad0b13",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.770067Z",
     "start_time": "2025-07-20T06:44:45.762120Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 计算各部门的总人数和离职人数\n",
    "dept_stats = df.groupby('Department').agg(\n",
    "    TotalCount=('Attrition', 'count'),\n",
    "    AttritionCount=('Attrition', 'sum')\n",
    ")\n",
    "\n",
    "# 计算离职率\n",
    "dept_stats['AttritionRate'] = dept_stats['AttritionCount'] / dept_stats['TotalCount']\n",
    "\n",
    "# 显示结果\n",
    "print(dept_stats)\n"
   ],
   "id": "e9e05440ccbf54e1",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                        TotalCount  AttritionCount  AttritionRate\n",
      "Department                                                       \n",
      "Human Resources                 42               9       0.214286\n",
      "Research & Development         727             102       0.140303\n",
      "Sales                          331              67       0.202417\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.789315Z",
     "start_time": "2025-07-20T06:44:45.784308Z"
    }
   },
   "cell_type": "code",
   "source": "df.JobRole.unique()",
   "id": "8ee5022084ffaacb",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Manufacturing Director', 'Laboratory Technician',\n",
       "       'Sales Executive', 'Research Scientist',\n",
       "       'Healthcare Representative', 'Human Resources',\n",
       "       'Sales Representative', 'Research Director', 'Manager'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.814473Z",
     "start_time": "2025-07-20T06:44:45.803133Z"
    }
   },
   "cell_type": "code",
   "source": "df",
   "id": "926d23e067a3bb3",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      Attrition  Age     BusinessTravel              Department  \\\n",
       "0             0   37      Travel_Rarely  Research & Development   \n",
       "1             0   54  Travel_Frequently  Research & Development   \n",
       "2             1   34  Travel_Frequently  Research & Development   \n",
       "3             0   39      Travel_Rarely  Research & Development   \n",
       "4             1   28  Travel_Frequently  Research & Development   \n",
       "...         ...  ...                ...                     ...   \n",
       "1095          0   35      Travel_Rarely  Research & Development   \n",
       "1096          0   38      Travel_Rarely                   Sales   \n",
       "1097          0   37      Travel_Rarely                   Sales   \n",
       "1098          1   22      Travel_Rarely  Research & Development   \n",
       "1099          1   26  Travel_Frequently  Research & Development   \n",
       "\n",
       "      DistanceFromHome  Education EducationField  EnvironmentSatisfaction  \\\n",
       "0                    1          4  Life Sciences                        1   \n",
       "1                    1          4  Life Sciences                        4   \n",
       "2                    7          3  Life Sciences                        1   \n",
       "3                    1          1  Life Sciences                        4   \n",
       "4                    1          3        Medical                        1   \n",
       "...                ...        ...            ...                      ...   \n",
       "1095                23          4        Medical                        3   \n",
       "1096                 2          4      Marketing                        2   \n",
       "1097                16          4      Marketing                        4   \n",
       "1098                 7          1  Life Sciences                        4   \n",
       "1099                 2          3  Life Sciences                        1   \n",
       "\n",
       "      Gender  JobInvolvement  ...  PerformanceRating RelationshipSatisfaction  \\\n",
       "0       Male               2  ...                  3                        3   \n",
       "1     Female               3  ...                  3                        1   \n",
       "2       Male               1  ...                  4                        4   \n",
       "3     Female               2  ...                  3                        3   \n",
       "4       Male               2  ...                  3                        1   \n",
       "...      ...             ...  ...                ...                      ...   \n",
       "1095  Female               3  ...                  3                        3   \n",
       "1096  Female               1  ...                  4                        1   \n",
       "1097    Male               2  ...                  3                        4   \n",
       "1098    Male               3  ...                  4                        1   \n",
       "1099    Male               3  ...                  3                        2   \n",
       "\n",
       "      StockOptionLevel TotalWorkingYears  TrainingTimesLastYear  \\\n",
       "0                    1                 7                      2   \n",
       "1                    1                33                      2   \n",
       "2                    0                 9                      3   \n",
       "3                    1                21                      3   \n",
       "4                    2                 1                      2   \n",
       "...                ...               ...                    ...   \n",
       "1095                 1                 4                      3   \n",
       "1096                 2                20                      4   \n",
       "1097                 2                 9                      2   \n",
       "1098                 0                 1                      2   \n",
       "1099                 1                 6                      2   \n",
       "\n",
       "      WorkLifeBalance YearsAtCompany  YearsInCurrentRole  \\\n",
       "0                   4              7                   5   \n",
       "1                   1              5                   4   \n",
       "2                   3              9                   7   \n",
       "3                   3             21                   6   \n",
       "4                   3              1                   0   \n",
       "...               ...            ...                 ...   \n",
       "1095                3              2                   2   \n",
       "1096                2              4                   2   \n",
       "1097                3              1                   0   \n",
       "1098                3              1                   0   \n",
       "1099                3              3                   2   \n",
       "\n",
       "      YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                           0                     7  \n",
       "1                           1                     4  \n",
       "2                           0                     6  \n",
       "3                          11                     8  \n",
       "4                           0                     0  \n",
       "...                       ...                   ...  \n",
       "1095                        2                     2  \n",
       "1096                        0                     3  \n",
       "1097                        0                     0  \n",
       "1098                        0                     0  \n",
       "1099                        1                     2  \n",
       "\n",
       "[1100 rows x 28 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Attrition</th>\n",
       "      <th>Age</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>Department</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EducationField</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>Gender</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>...</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>33</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>34</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>39</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>21</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>28</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1095</th>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>Medical</td>\n",
       "      <td>3</td>\n",
       "      <td>Female</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1096</th>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Sales</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>Marketing</td>\n",
       "      <td>2</td>\n",
       "      <td>Female</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1097</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Sales</td>\n",
       "      <td>16</td>\n",
       "      <td>4</td>\n",
       "      <td>Marketing</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1098</th>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099</th>\n",
       "      <td>1</td>\n",
       "      <td>26</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1100 rows × 28 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.871795Z",
     "start_time": "2025-07-20T06:44:45.853893Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# BusinessTravel、Department、EducationField、Gender、JobRole、MaritalStatus、OverTime\n",
    "df2 = pd.read_csv(PROCESSED_DATA_DIR / \"train_ready.csv\")\n",
    "df2"
   ],
   "id": "67f7360ddca4103d",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      Attrition  Age  DistanceFromHome  Education  EnvironmentSatisfaction  \\\n",
       "0             0   37                 1          4                        1   \n",
       "1             0   54                 1          4                        4   \n",
       "2             1   34                 7          3                        1   \n",
       "3             0   39                 1          1                        4   \n",
       "4             1   28                 1          3                        1   \n",
       "...         ...  ...               ...        ...                      ...   \n",
       "1095          0   35                23          4                        3   \n",
       "1096          0   38                 2          4                        2   \n",
       "1097          0   37                16          4                        4   \n",
       "1098          1   22                 7          1                        4   \n",
       "1099          1   26                 2          3                        1   \n",
       "\n",
       "      JobInvolvement  JobLevel  JobSatisfaction  MonthlyIncome  \\\n",
       "0                  2         2                3           5993   \n",
       "1                  3         3                3          10502   \n",
       "2                  1         2                3           6074   \n",
       "3                  2         4                4          12742   \n",
       "4                  2         1                2           2596   \n",
       "...              ...       ...              ...            ...   \n",
       "1095               3         1                1           4014   \n",
       "1096               1         2                4           5405   \n",
       "1097               2         2                3           6334   \n",
       "1098               3         1                2           2472   \n",
       "1099               3         1                1           2042   \n",
       "\n",
       "      NumCompaniesWorked  ...  JobRole_Manager  \\\n",
       "0                      1  ...            False   \n",
       "1                      7  ...            False   \n",
       "2                      1  ...            False   \n",
       "3                      1  ...            False   \n",
       "4                      1  ...            False   \n",
       "...                  ...  ...              ...   \n",
       "1095                   3  ...            False   \n",
       "1096                   2  ...            False   \n",
       "1097                   4  ...            False   \n",
       "1098                   1  ...            False   \n",
       "1099                   6  ...            False   \n",
       "\n",
       "      JobRole_Manufacturing Director  JobRole_Research Director  \\\n",
       "0                               True                      False   \n",
       "1                               True                      False   \n",
       "2                              False                      False   \n",
       "3                               True                      False   \n",
       "4                              False                      False   \n",
       "...                              ...                        ...   \n",
       "1095                           False                      False   \n",
       "1096                           False                      False   \n",
       "1097                           False                      False   \n",
       "1098                           False                      False   \n",
       "1099                           False                      False   \n",
       "\n",
       "      JobRole_Research Scientist  JobRole_Sales Executive  \\\n",
       "0                          False                    False   \n",
       "1                          False                    False   \n",
       "2                          False                    False   \n",
       "3                          False                    False   \n",
       "4                          False                    False   \n",
       "...                          ...                      ...   \n",
       "1095                       False                    False   \n",
       "1096                       False                    False   \n",
       "1097                       False                     True   \n",
       "1098                        True                    False   \n",
       "1099                        True                    False   \n",
       "\n",
       "      JobRole_Sales Representative  MaritalStatus_Divorced  \\\n",
       "0                            False                    True   \n",
       "1                            False                    True   \n",
       "2                            False                   False   \n",
       "3                            False                   False   \n",
       "4                            False                    True   \n",
       "...                            ...                     ...   \n",
       "1095                         False                   False   \n",
       "1096                          True                   False   \n",
       "1097                         False                    True   \n",
       "1098                         False                   False   \n",
       "1099                         False                   False   \n",
       "\n",
       "      MaritalStatus_Married  MaritalStatus_Single  OverTime_Yes  \n",
       "0                     False                 False         False  \n",
       "1                     False                 False         False  \n",
       "2                     False                  True          True  \n",
       "3                      True                 False         False  \n",
       "4                     False                 False         False  \n",
       "...                     ...                   ...           ...  \n",
       "1095                   True                 False          True  \n",
       "1096                   True                 False          True  \n",
       "1097                  False                 False         False  \n",
       "1098                  False                  True          True  \n",
       "1099                   True                 False          True  \n",
       "\n",
       "[1100 rows x 49 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Attrition</th>\n",
       "      <th>Age</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>...</th>\n",
       "      <th>JobRole_Manager</th>\n",
       "      <th>JobRole_Manufacturing Director</th>\n",
       "      <th>JobRole_Research Director</th>\n",
       "      <th>JobRole_Research Scientist</th>\n",
       "      <th>JobRole_Sales Executive</th>\n",
       "      <th>JobRole_Sales Representative</th>\n",
       "      <th>MaritalStatus_Divorced</th>\n",
       "      <th>MaritalStatus_Married</th>\n",
       "      <th>MaritalStatus_Single</th>\n",
       "      <th>OverTime_Yes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5993</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10502</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>34</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6074</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>39</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>12742</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2596</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1095</th>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4014</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1096</th>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>5405</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1097</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>16</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6334</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1098</th>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2472</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099</th>\n",
       "      <td>1</td>\n",
       "      <td>26</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2042</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1100 rows × 49 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 8
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:45.934151Z",
     "start_time": "2025-07-20T06:44:45.930581Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 收入增长率 = 薪资涨幅百分比/在公司工作年限\n",
    "df2['入职收入增长率'] = df2['PercentSalaryHike'] / df2['YearsAtCompany']"
   ],
   "id": "294c487f34c52930",
   "outputs": [],
   "execution_count": 9
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.089219Z",
     "start_time": "2025-07-20T06:44:46.085347Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 是否频繁换工作 = 曾工作过的公司数量 / 总工作年限\n",
    "df2['换工作频率'] = df2['NumCompaniesWorked'] / df2['TotalWorkingYears']"
   ],
   "id": "e9950883346caed9",
   "outputs": [],
   "execution_count": 10
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.277441Z",
     "start_time": "2025-07-20T06:44:46.273313Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 在岗时间比例 = 在当前岗位工作年限 / 在公司工作年限\n",
    "df2['在岗时间比例'] = df2['YearsInCurrentRole'] / df2['YearsAtCompany']"
   ],
   "id": "999b87bdd7b3cc54",
   "outputs": [],
   "execution_count": 11
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "df2['在岗时间比例'].fillna(0, inplace=True)",
   "id": "859f488b8ff55d7f",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.349898Z",
     "start_time": "2025-07-20T06:44:46.346378Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 入职年龄 = 年龄 - 总工作年限\n",
    "df2['入职年龄'] = df2['Age'] - df2['TotalWorkingYears']"
   ],
   "id": "11efbec8e992f241",
   "outputs": [],
   "execution_count": 13
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.404074Z",
     "start_time": "2025-07-20T06:44:46.400245Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 工作稳定指数 = 在公司工作年限 / 总工作年限\n",
    "df2['工作稳定指数'] = df2['YearsAtCompany'] / df2['TotalWorkingYears']"
   ],
   "id": "c31c0ced8965fb22",
   "outputs": [],
   "execution_count": 14
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "df2['工作稳定指数'].fillna(1, inplace=True)",
   "id": "abc6dab157f69e98",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "# 晋升停滞率 = 自上次晋升以来年限 / 在公司工作年限\n",
    "df2['晋升停滞率'] = df2['YearsSinceLastPromotion'] / df2['YearsAtCompany']\n",
    "df2['晋升停滞率'].fillna(0, inplace=True)"
   ],
   "id": "f47bbb13191590fa",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.527635Z",
     "start_time": "2025-07-20T06:44:46.523226Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 是否管理岗位 = 工作角色 ∈ ['Manager', 'Director', 'Executive']\n",
    "df2['isManager'] = df2['JobRole_Manager'] + df2['JobRole_Manufacturing Director'] + df2['JobRole_Research Director'] + \\\n",
    "                   df2['JobRole_Sales Executive']\n",
    "# df2"
   ],
   "id": "e0fc8d802987b994",
   "outputs": [],
   "execution_count": 17
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.556970Z",
     "start_time": "2025-07-20T06:44:46.551935Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 整体满意度指数 = （工作环境满意度 + 工作满意度 + 人际关系满意度 + 工作与生活平衡度） / 4\n",
    "df2['整体满意度指数'] = (df2['EnvironmentSatisfaction'] + df2['JobSatisfaction'] + df2['RelationshipSatisfaction'] +\n",
    "                         df2['WorkLifeBalance']) / 4\n"
   ],
   "id": "382ecffbad81056f",
   "outputs": [],
   "execution_count": 18
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:44:46.594336Z",
     "start_time": "2025-07-20T06:44:46.587334Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 满意度差值 = 工作满意度 - 工作环境满意度\n",
    "df2['满意度差值'] = df2['JobSatisfaction']-df2['EnvironmentSatisfaction']"
   ],
   "id": "a3d9247bf1b13f78",
   "outputs": [],
   "execution_count": 19
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:55:41.546790Z",
     "start_time": "2025-07-20T06:55:41.530837Z"
    }
   },
   "cell_type": "code",
   "source": "df2",
   "id": "aed1a75164fb1746",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "      Attrition  Age  DistanceFromHome  Education  EnvironmentSatisfaction  \\\n",
       "0             0   37                 1          4                        1   \n",
       "1             0   54                 1          4                        4   \n",
       "2             1   34                 7          3                        1   \n",
       "3             0   39                 1          1                        4   \n",
       "4             1   28                 1          3                        1   \n",
       "...         ...  ...               ...        ...                      ...   \n",
       "1095          0   35                23          4                        3   \n",
       "1096          0   38                 2          4                        2   \n",
       "1097          0   37                16          4                        4   \n",
       "1098          1   22                 7          1                        4   \n",
       "1099          1   26                 2          3                        1   \n",
       "\n",
       "      JobInvolvement  JobLevel  JobSatisfaction  MonthlyIncome  \\\n",
       "0                  2         2                3           5993   \n",
       "1                  3         3                3          10502   \n",
       "2                  1         2                3           6074   \n",
       "3                  2         4                4          12742   \n",
       "4                  2         1                2           2596   \n",
       "...              ...       ...              ...            ...   \n",
       "1095               3         1                1           4014   \n",
       "1096               1         2                4           5405   \n",
       "1097               2         2                3           6334   \n",
       "1098               3         1                2           2472   \n",
       "1099               3         1                1           2042   \n",
       "\n",
       "      NumCompaniesWorked  ...  OverTime_Yes    入职收入增长率     换工作频率    在岗时间比例  \\\n",
       "0                      1  ...         False   2.571429  0.142857  0.714286   \n",
       "1                      7  ...         False   3.400000  0.212121  0.800000   \n",
       "2                      1  ...          True   2.666667  0.111111  0.777778   \n",
       "3                      1  ...         False   0.761905  0.047619  0.285714   \n",
       "4                      1  ...         False  15.000000  1.000000  0.000000   \n",
       "...                  ...  ...           ...        ...       ...       ...   \n",
       "1095                   3  ...          True   7.500000  0.750000  1.000000   \n",
       "1096                   2  ...          True   5.000000  0.100000  0.500000   \n",
       "1097                   4  ...         False  19.000000  0.444444  0.000000   \n",
       "1098                   1  ...          True  23.000000  1.000000  0.000000   \n",
       "1099                   6  ...          True   4.666667  1.000000  0.666667   \n",
       "\n",
       "      入职年龄    工作稳定指数     晋升停滞率  isManager  整体满意度指数  满意度差值  \n",
       "0       30  1.000000  0.000000       True     2.75      2  \n",
       "1       21  0.151515  0.200000       True     2.25     -1  \n",
       "2       25  1.000000  0.000000      False     2.75      2  \n",
       "3       18  1.000000  0.523810       True     3.50      0  \n",
       "4       27  1.000000  0.000000      False     1.75      1  \n",
       "...    ...       ...       ...        ...      ...    ...  \n",
       "1095    31  0.500000  1.000000      False     2.50     -2  \n",
       "1096    18  0.200000  0.000000      False     2.25      2  \n",
       "1097    28  0.111111  0.000000       True     3.50     -1  \n",
       "1098    21  1.000000  0.000000      False     2.50     -2  \n",
       "1099    20  0.500000  0.333333      False     1.75      0  \n",
       "\n",
       "[1100 rows x 58 columns]"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Attrition</th>\n",
       "      <th>Age</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>...</th>\n",
       "      <th>OverTime_Yes</th>\n",
       "      <th>入职收入增长率</th>\n",
       "      <th>换工作频率</th>\n",
       "      <th>在岗时间比例</th>\n",
       "      <th>入职年龄</th>\n",
       "      <th>工作稳定指数</th>\n",
       "      <th>晋升停滞率</th>\n",
       "      <th>isManager</th>\n",
       "      <th>整体满意度指数</th>\n",
       "      <th>满意度差值</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>5993</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>2.571429</td>\n",
       "      <td>0.142857</td>\n",
       "      <td>0.714286</td>\n",
       "      <td>30</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>True</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10502</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>3.400000</td>\n",
       "      <td>0.212121</td>\n",
       "      <td>0.800000</td>\n",
       "      <td>21</td>\n",
       "      <td>0.151515</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>True</td>\n",
       "      <td>2.25</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>34</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6074</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>2.666667</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.777778</td>\n",
       "      <td>25</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>False</td>\n",
       "      <td>2.75</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>39</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>12742</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>0.761905</td>\n",
       "      <td>0.047619</td>\n",
       "      <td>0.285714</td>\n",
       "      <td>18</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.523810</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2596</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>False</td>\n",
       "      <td>1.75</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1095</th>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4014</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>7.500000</td>\n",
       "      <td>0.750000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>31</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>False</td>\n",
       "      <td>2.50</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1096</th>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>5405</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.100000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>18</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>False</td>\n",
       "      <td>2.25</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1097</th>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "      <td>16</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6334</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>28</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>True</td>\n",
       "      <td>3.50</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1098</th>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2472</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>21</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>False</td>\n",
       "      <td>2.50</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1099</th>\n",
       "      <td>1</td>\n",
       "      <td>26</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2042</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>4.666667</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>20</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>False</td>\n",
       "      <td>1.75</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1100 rows × 58 columns</p>\n",
       "</div>"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 32
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-20T06:55:39.895943Z",
     "start_time": "2025-07-20T06:55:39.890849Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df2['入职收入增长率']=df2['入职收入增长率'].replace([np.inf], 0)\n",
    "df2['换工作频率']=df2['换工作频率'].replace([np.inf], 0)"
   ],
   "id": "f899b4863b2b2ca9",
   "outputs": [],
   "execution_count": 31
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
